clear all;
close all;

h = 1;

%load the two data sets

deathsdata = readtable('Weeklydeaths2.xlsx');

serologyData = readtable('Serology2022.xlsx');

%clean up the deaths data

% Convert 'data_period_start' and 'data_period_end' to datetime
% Given the format 'M/D/YY'
deathsdata.data_period_start = datetime(deathsdata.data_period_start, 'InputFormat', 'MM/dd/yy');
deathsdata.data_period_end = datetime(deathsdata.data_period_end, 'InputFormat', 'MM/dd/yy');

% Fill missing values for specific numeric columns
numericColumns = {'COVID_deaths', 'COVID_pct_of_total', 'crude_COVID_rate', 'aa_COVID_rate', 'crude_COVID_rate_ann', 'aa_COVID_rate_ann'};
for col = numericColumns
    data.(col{1}) = fillmissing(deathsdata.(col{1}), 'constant', 0);
end

% Filter data starting from January 1, 2020
startDate = datetime(2020, 1, 1);
deathsdata = deathsdata(deathsdata.data_period_start >= startDate, :);

%select the deaths data for 4/2/2022

selectedDate = datetime('4/2/2022', 'InputFormat', 'MM/dd/yyyy');
regionsToExclude = {'Region 1', 'Region 2', 'Region 3', 'Region 4', 'Region 5', 'Region 6', 'Region 7', 'Region 8', 'Region 9', 'Region 10','United States','New York City','New York','Puerto Rico','Alaska','Hawaii'};
deathsdataFiltered = deathsdata(deathsdata.data_period_end == selectedDate & ...
                    strcmp(deathsdata.Group, 'total') & ...
                    ~ismember(deathsdata.State, regionsToExclude), :);

deathsdataFiltered.State(strcmp(deathsdataFiltered.State, 'New York and New York City')) = {'New York'};

% Preprocess the Serology 2022 data
% Filter based on your specified criteria
regionsToExclude2 = {'Overall','Northeast','Midwest','South','West'};
serologyDataFiltered = serologyData(strcmp(serologyData.Indicator,'Past infection with or without vaccination') &...
                                    strcmp(serologyData.Race, 'Overall') & ...
                                    strcmp(serologyData.Sex, 'Overall') & ...
                                    strcmp(serologyData.Age, 'Overall') & ...
                                    ~ismember(serologyData.State,regionsToExclude2 )&...
                                    strcmp(serologyData.Time_Period, '2022 Quarter 1'), :);

%Get the Q4 data
regionsToExclude2 = {'Overall','Northeast','Midwest','South','West'};
serologyDataFilteredQ4 = serologyData(strcmp(serologyData.Indicator,'Past infection with or without vaccination') &...
                                    strcmp(serologyData.Race, 'Overall') & ...
                                    strcmp(serologyData.Sex, 'Overall') & ...
                                    strcmp(serologyData.Age, 'Overall') & ...
                                    ~ismember(serologyData.State,regionsToExclude2 )&...
                                    strcmp(serologyData.Time_Period, '2022 Quarter 4'), :);

% Merge the datasets based on state names
mergedData = innerjoin(serologyDataFiltered, deathsdataFiltered, 'Keys', 'State');

mergedDataQ1Q4 = innerjoin(serologyDataFiltered,serologyDataFilteredQ4, 'Keys', 'State');

p = polyfit(mergedData.aa_COVID_rate,mergedData.Estimate,  1);
px = [min(mergedData.aa_COVID_rate) max(mergedData.aa_COVID_rate)];
py = polyval(p, px);

slope1 = mergedData.crude_COVID_rate\mergedData.Estimate;
slope1aa = mergedData.aa_COVID_rate\mergedData.Estimate;
slope1rev = (mergedData.Estimate\mergedData.crude_COVID_rate)/1000;
slope1revaa = (mergedData.Estimate\mergedData.aa_COVID_rate)/1000;






figure(h)
plot(mergedData.Estimate,mergedData.aa_COVID_rate,'.','MarkerSize',10)
hold on
plot(mergedData.Estimate, slope1revaa*1000*mergedData.Estimate, 'LineWidth', 2);
title('Percent Infected vs. Age-Adjusted COVID Death Rate per 100K  as of 4/2/2022')
xlabel('Percent Infected')
ylabel('COVID Death Rate per 100K')
h = h+1;
hh=gcf;
set(hh,'PaperOrientation','landscape');
set(hh,'PaperUnits','normalized');
set(hh,'PaperPosition', [0 0 1 1]);
exportgraphics(gcf,'FigureB12Left.pdf','ContentType','vector')

%now get the 65+ serology data

% Preprocess the Serology 2022 data
% Filter based on your specified criteria
regionsToExclude2 = {'Overall','Northeast','Midwest','South','West'};
serologyDataFilteredold = serologyData(strcmp(serologyData.Indicator,'Past infection with or without vaccination') &...
                                    strcmp(serologyData.Race, 'Overall') & ...
                                    strcmp(serologyData.Sex, 'Overall') & ...
                                    strcmp(serologyData.Age, '65 and over') & ...
                                    ~ismember(serologyData.State,regionsToExclude2 )&...
                                    strcmp(serologyData.Time_Period, '2022 Quarter 1'), :);

slopeold = serologyDataFiltered.Estimate\serologyDataFilteredold.Estimate;

p = polyfit(serologyDataFiltered.Estimate,serologyDataFilteredold.Estimate,  2);
px = [min(serologyDataFiltered.Estimate) max(serologyDataFiltered.Estimate)];
py = polyval(p, px);

figure(h)
plot(serologyDataFiltered.Estimate,serologyDataFilteredold.Estimate,'.','MarkerSize',10)
hold on
plot(px,py, 'LineWidth', 2)
title('Percent Infected Overall and 65+ 2022 Q1')
xlabel('Percent Infected Overall')
ylabel('Percent Infected 65+')
h = h+1;
hh=gcf;
set(hh,'PaperOrientation','landscape');
set(hh,'PaperUnits','normalized');
set(hh,'PaperPosition', [0 0 1 1]);
exportgraphics(gcf,'FigureA2.pdf','ContentType','vector')


regionsToExclude2 = {'Overall','Northeast','Midwest','South','West'};
serologyDataFilteredeither = serologyData(strcmp(serologyData.Indicator,'Combined seroprevalence') &...
                                    strcmp(serologyData.Race, 'Overall') & ...
                                    strcmp(serologyData.Sex, 'Overall') & ...
                                    strcmp(serologyData.Age, 'Overall') & ...
                                    ~ismember(serologyData.State,regionsToExclude2 )&...
                                    strcmp(serologyData.Time_Period, '2022 Quarter 1'), :);

figure(h);
barh(categorical(serologyDataFilteredeither.State), serologyDataFilteredeither.Estimate);
xlabel('Combined Seroprevalence 2022 Q1');
title('Combined Seroprevalence 2022 Q1');
h = h+1;
hh=gcf;
set(hh,'PaperOrientation','landscape');
set(hh,'PaperUnits','normalized');
set(hh,'PaperPosition', [0 0 1 1]);
exportgraphics(gcf,'FigureB11Right.pdf','ContentType','vector')


figure(h);
barh(categorical(serologyDataFiltered.State), serologyDataFiltered.Estimate);
xlabel('Percent Infected Overall 2022 Q1');
title('Percent Infected Overall 2022 Q1');
h = h+1;
hh=gcf;
set(hh,'PaperOrientation','landscape');
set(hh,'PaperUnits','normalized');
set(hh,'PaperPosition', [0 0 1 1]);
exportgraphics(gcf,'FigureB11Left.pdf','ContentType','vector')



figure(h);
barh(categorical(mergedData.State), mergedData.aa_COVID_rate./mergedData.Estimate/1000);
xlabel('Implied Age Adjusted IFR 2022 Q1');
title('Implied Age Adjusted IFR 2022 Q1');
h = h+1;
hh=gcf;
set(hh,'PaperOrientation','landscape');
set(hh,'PaperUnits','normalized');
set(hh,'PaperPosition', [0 0 1 1]);
exportgraphics(gcf,'FigureB12Right.pdf','ContentType','vector')




regionsToExclude2 = {'Overall','Northeast','Midwest','South','West'};
serologyDataFiltervonly = serologyData(strcmp(serologyData.Indicator,'Presumed vaccination without infection') &...
                                    strcmp(serologyData.Race, 'Overall') & ...
                                    strcmp(serologyData.Sex, 'Overall') & ...
                                    strcmp(serologyData.Age, 'Overall') & ...
                                    ~ismember(serologyData.State,regionsToExclude2 )&...
                                    strcmp(serologyData.Time_Period, '2022 Quarter 1'), :);

figure(h);
barh(categorical(serologyDataFiltervonly.State), serologyDataFiltervonly.Estimate);
xlabel('Presumed vaccinated with no infection 2022 Q1');
title('Presumed vaccinated with no infection 2022 Q1');
h = h+1;

regionsToExclude2 = {'Overall','Northeast','Midwest','South','West'};
serologyDataFiltervonlyold = serologyData(strcmp(serologyData.Indicator,'Presumed vaccination without infection') &...
                                    strcmp(serologyData.Race, 'Overall') & ...
                                    strcmp(serologyData.Sex, 'Overall') & ...
                                    strcmp(serologyData.Age, '65 and over') & ...
                                    ~ismember(serologyData.State,regionsToExclude2 )&...
                                    strcmp(serologyData.Time_Period, '2022 Quarter 1'), :);



figure(h)
plot(mergedDataQ1Q4.Estimate_serologyDataFiltered,mergedDataQ1Q4.Estimate_serologyDataFilteredQ4,'.','MarkerSize',10)
xlim([20 100])
ylim([20 100])
title('Percent Infected Overall 2022 Q1 and Q4')
xlabel('Percent Infected 2022 Q1')
ylabel('Percent Infected 2022 Q4')
h = h+1;
hh=gcf;
set(hh,'PaperOrientation','landscape');
set(hh,'PaperUnits','normalized');
set(hh,'PaperPosition', [0 0 1 1]);
exportgraphics(gcf,'FigureB13.pdf','ContentType','vector')
